Goals

  1. Load in phyloseq data with rooted tree.
  2. Evaluate sequencing depth and remove sample.
  3. Normalize the read counts between samples.
  4. Calculate community dissimilarities. Numbers between 0 and 1. If 0, completely similar versus if they are 1, then they are completely dissimilar.
    1. Sorensen: Presence/ Absence. Weighted by number of shared taxa. Shared species as a binary-valye. Abundance-unweighted.
    2. Bray-Curtis: Relative abundance. Weighted by number of shared taxa. Shared abundant species: abundance weighted.
    3. (Abundance) Weighted UNIFRAC: Consider abundant species and where they fall on the tree.
  5. Visualize the community data with two unconstricted ordinations:
    1. PCoA: Linear method. Uses matrix algebra to calculate eigenvalye. Calculate how much variation is explained by each axis. Can choose to view axis 1, 2, 3, etc. and plot them together.
    2. NMDS: Non-linear method. Collapse multiple axes into two (or three) dimensions. Can see more axes of variation into fewer axes. Always need to report a stress value. (Ideally less than 0.15)
  6. Run statistics with PERMANOVA and betadispR.

Setup

Load Libraries

# Install packages if needed

pacman::p_load(tidyverse, devtools, phyloseq, patchwork, vegan, ggpubr, rstatix,
               ggplot2,install = FALSE)

Load in colors

host_species_colors <- c(
  "Acanthurus nigros" = "dodgerblue",
  "Acanthurus achilles" = "royalblue",
  "Acanthurus nigrofuscus" = "blue3",
  "Acanthurus olivaceus" = "blue",
  "Acanthurus triostegus" = "slateblue3",
  "Ctenochaetus striatus" = "olivedrab",
  "Naso brevirostris" = "red2",
  "Naso lituratus" = "tomato4",
  "Naso tonganus" = "salmon2",
  "Naso unicornis" = "deeppink",
  "Odax pullus" = "springgreen2",
  "Zebrasoma flavescens" = "darkorange",
  "Zebrasoma scopas" = "yellow2",
  "Zebrasoma velifer" = "goldenrod")

region_colors <- c(
  "Cook Islands" = "olivedrab4",
  "Hawaii" = "deeppink",
  GBR = "royalblue3",
  Captivity = "brown")

gut_section_colors <- c(
  III = "darkslateblue",
  IV = "darkorchid1",
  V = "cyan3",
  feces = "cornflowerblue")

diet_colors <- c(
  herbivore = "forestgreen",
  detritovore = "hotpink4",
  omnivore = "orange")

Load in data

# Load in rooted phylogenetic tree!
load("data/07_Phylogenetic_Tree_epulos/phytree_preprocessed_physeq.RData")
unrooted_physeq
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1112 taxa and 83 samples ]
## sample_data() Sample Data:       [ 83 samples by 10 sample variables ]
## tax_table()   Taxonomy Table:    [ 1112 taxa by 9 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 1112 tips and 1110 internal nodes ]
midroot_physeq
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 1112 taxa and 83 samples ]
## sample_data() Sample Data:       [ 83 samples by 10 sample variables ]
## tax_table()   Taxonomy Table:    [ 1112 taxa by 9 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 1112 tips and 1111 internal nodes ]

Explore Read Counts

Raw Read Depth

# Sequence depth will inform us on how we want to normalize our data
# Calculate the total number of reads per sample
raw_total_seqs_df <-
  unrooted_physeq %>%
  # Calculate the sample read sums
  sample_sums() %>%
  data.frame()

# Name the column
colnames(raw_total_seqs_df)[1] <- "TotalSeqs"

head(raw_total_seqs_df)
##     TotalSeqs
## A16     15903
## A17     20099
## A18      1464
## AA1      2920
## AA2      9125
## AA3     10225
# Make a histogram of raw reads
raw_seqs_histogram <-
  raw_total_seqs_df %>%
  ggplot(aes(x = TotalSeqs)) +
  geom_histogram(bins = 50) +
  scale_x_continuous(limits = c(0, 50000)) +
  labs(title = "Raw Sequencing Depth Distribution") + 
  theme_bw()

Remove lowly sequenced sample

Normalize Read Counts

### scale_reads function and matround function
#################################################################################### 
# Function to scale reads: http://deneflab.github.io/MicrobeMiseq/ 
# Scales reads by 
# 1) taking proportions
# 2) multiplying by a given library size of n
# 3) rounding 
# Default for n is the minimum sample size in your library
# Default for round is floor

matround <- function(x){trunc(x+0.5)}

scale_reads <- function(physeq, n = min(sample_sums(physeq)), round = "round") {
  
  # transform counts to n
  physeq.scale <- transform_sample_counts(physeq, function(x) {(n * x/sum(x))})
  
  # Pick the rounding functions
  if (round == "floor"){
    otu_table(physeq.scale) <- floor(otu_table(physeq.scale))
  } else if (round == "round"){
    otu_table(physeq.scale) <- round(otu_table(physeq.scale))
  } else if (round == "matround"){
    otu_table(physeq.scale) <- matround(otu_table(physeq.scale))
  }
  
  # Prune taxa and return new phyloseq object
  physeq.scale <- prune_taxa(taxa_sums(physeq.scale) > 0, physeq.scale)
  return(physeq.scale)
  
  }

Rescale all reads so they all represent the count of the lowest number of sequence reads. We will expect each sample to have # of reads around 2200

This is where one might decide to use rarefaction to normalize the data.

Scale reads and check the distribution of the seq depth

min(sample_sums(midroot_physeq))
## [1] 559
# Scale reads by the above function
scaled_rooted_physeq <-
  midroot_physeq %>%
  scale_reads(round = "matround")

# Calculate read depth
## Look at total number of sequences in each sample and compare to what we had before

scaled_total_seqs_df <- 
  scaled_rooted_physeq %>%
  sample_sums() %>%
  data.frame()

head(scaled_total_seqs_df)
##       .
## A16 555
## A17 562
## A18 561
## AA1 559
## AA2 556
## AA3 554
# Change first column name to be "TotalSeqs"
colnames(scaled_total_seqs_df)[1] <- "TotalSeqs"

# Inspect
head(scaled_total_seqs_df)
##     TotalSeqs
## A16       555
## A17       562
## A18       561
## AA1       559
## AA2       556
## AA3       554
# Check range of data
min_seqs <-
  min(scaled_total_seqs_df)
max_seqs <-
 max(scaled_total_seqs_df)
# Range of seqs
max_seqs - min_seqs
## [1] 11
# Plot histogram
scaled_total_seqs_df %>%
  ggplot(aes(x = TotalSeqs)) +
  geom_histogram(bins = 50) +
  scale_x_continuous(limits = c(0, 600)) +
  labs(title = "Scaled Sequencing Depth at 555") + 
  theme_bw()
## Warning: Removed 2 rows containing missing values (`geom_bar()`).

head(scaled_total_seqs_df)
##     TotalSeqs
## A16       555
## A17       562
## A18       561
## AA1       559
## AA2       556
## AA3       554

Calculate & Visualize Community Dissimiliarity

Exploratory analyses from the Paily & Shankar (2016) paper, which is using unconstrained ordination methods like PCoA.

Sorenson PCoA

# Calculate sorenson dissimularity: Abundance-unweighted of shared taxa
scaled_soren_pcoa <-  
  ordinate(
    physeq = scaled_rooted_physeq,
    method = "PCoA",
    distance = "bray", binary = TRUE)

#str(scaled_soren_pcoa)

# Plot the ordination host species
soren_diet_pcoa <- plot_ordination(
  physeq = scaled_rooted_physeq,
  ordination = scaled_soren_pcoa,
  color = "diet",
  title = "Sorensen PCoA") +
  geom_point(size=5, alpha = 0.5, aes(color = diet)) +
  scale_color_manual(values = diet_colors) + 
  theme_bw()
# Show the plot 
soren_diet_pcoa

Bray-Curtis PCoA

# Calculate the BC distance
scaled_BC_pcoa <- 
  ordinate(
    physeq = scaled_rooted_physeq,
    method = "PCoA",
    distance = "bray")

# Plot the PCoA
bray_diet_pcoa <- 
  plot_ordination(
    physeq = scaled_rooted_physeq,
    ordination = scaled_BC_pcoa,
    color = "diet",
    title = "Bray-Curtis PCoA") +
  geom_point(size=5, alpha = 0.5, aes(color = diet)) +
  scale_color_manual(values = c(diet_colors)) + 
  theme_bw()
bray_diet_pcoa

Weighted-Unifrac PCoA

# Calculate the BC distance
scaled_wUNI_pcoa <- 
  ordinate(
    physeq = scaled_rooted_physeq,
    method = "PCoA",
    distance = "wunifrac")

# Plot the PCoA
wUNI_diet_pcoa <- 
  plot_ordination(
    physeq = scaled_rooted_physeq,
    ordination = scaled_wUNI_pcoa,
    color = "diet",
    title = "Weighted Unifrac PCoA") +
  geom_point(size=5, alpha = 0.5, aes(color = diet)) +
  scale_color_manual(values = c(diet_colors)) + 
  theme_bw()
wUNI_diet_pcoa

Combine PCoAs

Let’s plot all three together into one plot to have a concise visualization of the three metrics.

(soren_diet_pcoa + theme(legend.position = "none")) + 
  (bray_diet_pcoa + theme(legend.position = "none")) + 
    (wUNI_diet_pcoa + theme(legend.position = "none"))

NMDS

Weighted Unifrac

Since we did 3 of the dissimilarity metrics for the PCoA, let’s just plot one example of them for the NMDS plotting. Here, we will use weighted Unifrac

# Calculate the Weighted Unifrac distance
scaled_wUNI_nmds <- 
  ordinate(
    physeq = scaled_rooted_physeq,
    method = "NMDS",
    distance = "wunifrac")
## Run 0 stress 0.1304584 
## Run 1 stress 0.1550611 
## Run 2 stress 0.1322358 
## Run 3 stress 0.1861179 
## Run 4 stress 0.1811754 
## Run 5 stress 0.1415828 
## Run 6 stress 0.1864384 
## Run 7 stress 0.1467709 
## Run 8 stress 0.1653355 
## Run 9 stress 0.1604274 
## Run 10 stress 0.1566682 
## Run 11 stress 0.155879 
## Run 12 stress 0.1659982 
## Run 13 stress 0.1539487 
## Run 14 stress 0.166497 
## Run 15 stress 0.1449272 
## Run 16 stress 0.1805283 
## Run 17 stress 0.163302 
## Run 18 stress 0.1322358 
## Run 19 stress 0.1748714 
## Run 20 stress 0.1372046 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     14: stress ratio > sratmax
##      6: scale factor of the gradient < sfgrmin
# Plot the PCoA
wUNI_host_species_nmds <- 
  plot_ordination(
    physeq = scaled_rooted_physeq,
    ordination = scaled_wUNI_nmds,
    color = "host_species") +
  geom_point(size=5, alpha = 0.5, aes(color = host_species)) +
  scale_color_manual(values = c(host_species_colors)) + 
  theme_bw() + labs(color = "host_species")
wUNI_host_species_nmds

wUNI_gut_section_nmds <- 
  plot_ordination(
    physeq = scaled_rooted_physeq,
    ordination = scaled_wUNI_nmds,
    color = "gut_section") +
  geom_point(size=5, alpha = 0.5, aes(color = gut_section)) +
  scale_color_manual(values = c(gut_section_colors)) + 
  theme_bw() + labs(color = "gut_section")
wUNI_gut_section_nmds

wUNI_region_nmds <- 
  plot_ordination(
    physeq = scaled_rooted_physeq,
    ordination = scaled_wUNI_nmds,
    color = "region") +
  geom_point(size=5, alpha = 0.5, aes(color = region)) +
  scale_color_manual(values = c(region_colors)) + 
  theme_bw() + labs(color = "region")
wUNI_region_nmds

wUNI_diet_nmds <- 
  plot_ordination(
    physeq = scaled_rooted_physeq,
    ordination = scaled_wUNI_nmds,
    color = "diet") +
  geom_point(size=5, alpha = 0.5, aes(color = diet)) +
  scale_color_manual(values = c(diet_colors)) + 
  theme_bw() + labs(color = "diet")
wUNI_diet_nmds

wUNI_diet_gut_section_nmds <- 
  plot_ordination(
    physeq = scaled_rooted_physeq,
    ordination = scaled_wUNI_nmds,
    color = "diet",
    shape = "gut_section") +
  geom_point(size=5, alpha = 0.5, aes(color = diet)) +
  scale_shape_manual(values = c(0,2,15,16,17,18)) +
  scale_color_manual(values = c(diet_colors)) + 
  theme_bw() + labs(color = "diet")
wUNI_diet_gut_section_nmds

wUNI_gut_section_diet_nmds <- 
  plot_ordination(
    physeq = scaled_rooted_physeq,
    ordination = scaled_wUNI_nmds,
    color = "gut_section",
    shape = "diet") +
  geom_point(size=5, alpha = 0.5, aes(color = gut_section)) +
  scale_shape_manual(values = c(15,16,17)) +
  scale_color_manual(values = c(gut_section_colors)) + 
  theme_bw() + labs(color = "gut_section")
wUNI_gut_section_diet_nmds

wUNI_diet_region_nmds <- 
  plot_ordination(
    physeq = scaled_rooted_physeq,
    ordination = scaled_wUNI_nmds,
    color = "diet",
    shape = "region") +
  geom_point(size=5, alpha = 0.5, aes(color = diet)) +
  scale_shape_manual(values = c(15,16,17,18,19)) +
  scale_color_manual(values = c(diet_colors)) + 
  theme_bw() + labs(color = "diet")
wUNI_diet_region_nmds

wUNI_region_diet_nmds <- 
  plot_ordination(
    physeq = scaled_rooted_physeq,
    ordination = scaled_wUNI_nmds,
    color = "region",
    shape = "diet") +
  geom_point(size=5, alpha = 0.5, aes(color = region)) +
  scale_shape_manual(values = c(15,16,17,18)) +
  scale_color_manual(values = c(region_colors)) + 
  theme_bw() + labs(color = "region")
wUNI_region_diet_nmds

wUNI_host_species_diet_nmds <- 
  plot_ordination(
    physeq = scaled_rooted_physeq,
    ordination = scaled_wUNI_nmds,
    color = "host_species",
    shape = "diet") +
  geom_point(size=5, alpha = 0.5, aes(color = host_species)) +
  scale_shape_manual(values = c(15, 16, 17, 18)) +
  scale_color_manual(values = c(host_species_colors)) + 
  theme_bw() + labs(color = "host_species")
wUNI_host_species_diet_nmds

Statistical Significance Testing

PERMANOVA

# Calculate all three of the distance matrices
scaled_sorensen_dist <- phyloseq::distance(scaled_rooted_physeq, method = "bray", binary = TRUE)
scaled_bray_dist <- phyloseq::distance(scaled_rooted_physeq, method = "bray")
scaled_wUnifrac_dist <- phyloseq::distance(scaled_rooted_physeq, method = "wunifrac")

# make a data frame from the sample_data
# All distance matrices will be the same metadata because they 
# originate from the same phyloseq object. 
metadata <- data.frame(sample_data(scaled_rooted_physeq))

# Adonis test
# In this example we are testing the hypothesis that the five stations
# that were collected have different centroids in the ordination space 
# for each of the dissimilarity metrics, we are using a discrete variable 

# diet
adonis2(scaled_sorensen_dist ~ diet, data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_sorensen_dist ~ diet, data = metadata)
##          Df SumOfSqs      R2      F Pr(>F)    
## diet      2   3.3465 0.15627 7.4086  0.001 ***
## Residual 80  18.0683 0.84373                  
## Total    82  21.4148 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_bray_dist ~ diet, data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_bray_dist ~ diet, data = metadata)
##          Df SumOfSqs      R2      F Pr(>F)    
## diet      2   4.4627 0.15375 7.2671  0.001 ***
## Residual 80  24.5640 0.84625                  
## Total    82  29.0268 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_wUnifrac_dist ~ diet, data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_wUnifrac_dist ~ diet, data = metadata)
##          Df SumOfSqs      R2      F Pr(>F)    
## diet      2   2.6494 0.16349 7.8178  0.001 ***
## Residual 80  13.5557 0.83651                  
## Total    82  16.2051 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# region
adonis2(scaled_sorensen_dist ~ region, data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_sorensen_dist ~ region, data = metadata)
##          Df SumOfSqs      R2      F Pr(>F)    
## region    3   2.0363 0.09509 2.7671  0.001 ***
## Residual 79  19.3785 0.90491                  
## Total    82  21.4148 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_bray_dist ~ region, data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_bray_dist ~ region, data = metadata)
##          Df SumOfSqs      R2     F Pr(>F)    
## region    3    2.405 0.08286 2.379  0.001 ***
## Residual 79   26.622 0.91714                 
## Total    82   29.027 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_wUnifrac_dist ~ region, data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_wUnifrac_dist ~ region, data = metadata)
##          Df SumOfSqs      R2     F Pr(>F)   
## region    3   1.5085 0.09309 2.703  0.002 **
## Residual 79  14.6965 0.90691                
## Total    82  16.2051 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# gut section
adonis2(scaled_sorensen_dist ~ gut_section, data = metadata) # 0.559
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_sorensen_dist ~ gut_section, data = metadata)
##             Df SumOfSqs      R2      F Pr(>F)
## gut_section  5   1.2577 0.05873 0.9609  0.561
## Residual    77  20.1571 0.94127              
## Total       82  21.4148 1.00000
adonis2(scaled_bray_dist ~ gut_section, data = metadata) # 0.0438
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_bray_dist ~ gut_section, data = metadata)
##             Df SumOfSqs      R2      F Pr(>F)
## gut_section  5   1.7897 0.06166 1.0119  0.434
## Residual    77  27.2371 0.93834              
## Total       82  29.0268 1.00000
adonis2(scaled_wUnifrac_dist ~ gut_section, data = metadata) # 0.809
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_wUnifrac_dist ~ gut_section, data = metadata)
##             Df SumOfSqs      R2      F Pr(>F)
## gut_section  5   0.7348 0.04535 0.7315  0.807
## Residual    77  15.4702 0.95465              
## Total       82  16.2051 1.00000
# host species
adonis2(scaled_sorensen_dist ~ host_species, data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_sorensen_dist ~ host_species, data = metadata)
##              Df SumOfSqs      R2      F Pr(>F)    
## host_species 13   11.117 0.51911 5.7295  0.001 ***
## Residual     69   10.298 0.48089                  
## Total        82   21.415 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_bray_dist ~ host_species, data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_bray_dist ~ host_species, data = metadata)
##              Df SumOfSqs     R2     F Pr(>F)    
## host_species 13   15.140 0.5216 5.787  0.001 ***
## Residual     69   13.886 0.4784                 
## Total        82   29.027 1.0000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_wUnifrac_dist ~ host_species, data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_wUnifrac_dist ~ host_species, data = metadata)
##              Df SumOfSqs      R2      F Pr(>F)    
## host_species 13   8.5083 0.52504 5.8673  0.001 ***
## Residual     69   7.6968 0.47496                  
## Total        82  16.2051 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Note that:

  • R2 = the percent variation explained.
  • F = the F-Statistic, which represents the importance value.
  • Pr(>F) = the pvalue
# We might also care about other variables
# Here, we will add date and fraction as variables
# multiplicative model ORDER MATTERS! 
adonis2(scaled_wUnifrac_dist ~ diet * region, data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_wUnifrac_dist ~ diet * region, data = metadata)
##             Df SumOfSqs      R2      F Pr(>F)    
## diet         2   2.6494 0.16349 8.7188  0.001 ***
## region       3   1.3516 0.08341 2.9654  0.001 ***
## diet:region  2   0.8090 0.04992 2.6622  0.012 *  
## Residual    75  11.3951 0.70318                  
## Total       82  16.2051 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_wUnifrac_dist ~ diet * gut_section, data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_wUnifrac_dist ~ diet * gut_section, data = metadata)
##                  Df SumOfSqs      R2      F Pr(>F)    
## diet              2   2.6494 0.16349 7.7321  0.001 ***
## gut_section       5   0.6696 0.04132 0.7817  0.761    
## diet:gut_section  5   0.8935 0.05514 1.0430  0.390    
## Residual         70  11.9926 0.74005                  
## Total            82  16.2051 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis2(scaled_wUnifrac_dist ~ diet * host_species, data = metadata)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = scaled_wUnifrac_dist ~ diet * host_species, data = metadata)
##              Df SumOfSqs      R2       F Pr(>F)    
## diet          2   2.6494 0.16349 11.8755  0.001 ***
## host_species 11   5.8589 0.36155  4.7749  0.001 ***
## Residual     69   7.6968 0.47496                   
## Total        82  16.2051 1.00000                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We can also run tests that include additive (+) or multipliciatve models, which include the interaction term between variables.

BetaDispR

The PERMANOVA is sensitive to variance/dispersion in the data. Therefore, we need to run a homogeneity of dispersion test to test for the sensitivity of our PERMANOVA results to variance.

# Homogeneity of Disperson test with beta dispr
# Sorensen 
beta_soren_diet <- betadisper(scaled_sorensen_dist, metadata$diet)
permutest(beta_soren_diet)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)    
## Groups     2 0.35638 0.178190 13.594    999  0.001 ***
## Residuals 80 1.04867 0.013108                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Bray-curtis 
beta_bray_diet <- betadisper(scaled_bray_dist, metadata$diet)
permutest(beta_bray_diet)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq Mean Sq      F N.Perm Pr(>F)    
## Groups     2 0.67166 0.33583 34.258    999  0.001 ***
## Residuals 80 0.78425 0.00980                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Weighted Unifrac 
weighted_unifrac_diet <- betadisper(scaled_wUnifrac_dist, metadata$diet)
permutest(beta_bray_diet)
## 
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 999
## 
## Response: Distances
##           Df  Sum Sq Mean Sq      F N.Perm Pr(>F)    
## Groups     2 0.67166 0.33583 34.258    999  0.001 ***
## Residuals 80 0.78425 0.00980                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Session Information

For Reproducibility

#Ensure reproducibility
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.3.2 (2023-10-31)
##  os       macOS Sonoma 14.6.1
##  system   x86_64, darwin20
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       America/New_York
##  date     2024-09-05
##  pandoc   3.1.11 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/x86_64/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package          * version    date (UTC) lib source
##  abind              1.4-5      2016-07-21 [2] CRAN (R 4.3.0)
##  ade4               1.7-22     2023-02-06 [2] CRAN (R 4.3.0)
##  ape                5.7-1      2023-03-13 [2] CRAN (R 4.3.0)
##  backports          1.4.1      2021-12-13 [2] CRAN (R 4.3.0)
##  Biobase            2.62.0     2023-10-24 [2] Bioconductor
##  BiocGenerics       0.48.1     2023-11-01 [2] Bioconductor
##  biomformat         1.30.0     2023-10-24 [2] Bioconductor
##  Biostrings         2.70.2     2024-01-28 [2] Bioconductor 3.18 (R 4.3.2)
##  bitops             1.0-7      2021-04-24 [2] CRAN (R 4.3.0)
##  broom              1.0.5      2023-06-09 [2] CRAN (R 4.3.0)
##  bslib              0.6.1      2023-11-28 [2] CRAN (R 4.3.0)
##  cachem             1.0.8      2023-05-01 [2] CRAN (R 4.3.0)
##  car                3.1-2      2023-03-30 [1] CRAN (R 4.3.0)
##  carData            3.0-5      2022-01-06 [1] CRAN (R 4.3.0)
##  cli                3.6.2      2023-12-11 [2] CRAN (R 4.3.0)
##  cluster            2.1.6      2023-12-01 [2] CRAN (R 4.3.0)
##  codetools          0.2-19     2023-02-01 [2] CRAN (R 4.3.2)
##  colorspace         2.1-0      2023-01-23 [2] CRAN (R 4.3.0)
##  crayon             1.5.2      2022-09-29 [2] CRAN (R 4.3.0)
##  data.table         1.15.0     2024-01-30 [2] CRAN (R 4.3.2)
##  devtools         * 2.4.5      2022-10-11 [2] CRAN (R 4.3.0)
##  digest             0.6.34     2024-01-11 [2] CRAN (R 4.3.0)
##  dplyr            * 1.1.4      2023-11-17 [2] CRAN (R 4.3.0)
##  ellipsis           0.3.2      2021-04-29 [2] CRAN (R 4.3.0)
##  evaluate           0.23       2023-11-01 [2] CRAN (R 4.3.0)
##  fansi              1.0.6      2023-12-08 [2] CRAN (R 4.3.0)
##  farver             2.1.1      2022-07-06 [2] CRAN (R 4.3.0)
##  fastmap            1.1.1      2023-02-24 [2] CRAN (R 4.3.0)
##  forcats          * 1.0.0      2023-01-29 [2] CRAN (R 4.3.0)
##  foreach            1.5.2      2022-02-02 [2] CRAN (R 4.3.0)
##  fs                 1.6.3      2023-07-20 [2] CRAN (R 4.3.0)
##  generics           0.1.3      2022-07-05 [2] CRAN (R 4.3.0)
##  GenomeInfoDb       1.38.6     2024-02-08 [2] Bioconductor 3.18 (R 4.3.2)
##  GenomeInfoDbData   1.2.11     2023-11-13 [2] Bioconductor
##  ggplot2          * 3.4.4      2023-10-12 [1] CRAN (R 4.3.0)
##  ggpubr           * 0.6.0.999  2024-07-30 [1] Github (kassambara/ggpubr@6aeb4f7)
##  ggsignif           0.6.4      2022-10-13 [1] CRAN (R 4.3.0)
##  glue               1.7.0      2024-01-09 [1] CRAN (R 4.3.0)
##  gtable             0.3.4      2023-08-21 [2] CRAN (R 4.3.0)
##  highr              0.10       2022-12-22 [2] CRAN (R 4.3.0)
##  hms                1.1.3      2023-03-21 [2] CRAN (R 4.3.0)
##  htmltools          0.5.7      2023-11-03 [2] CRAN (R 4.3.0)
##  htmlwidgets        1.6.4      2023-12-06 [2] CRAN (R 4.3.0)
##  httpuv             1.6.14     2024-01-26 [2] CRAN (R 4.3.2)
##  igraph             2.0.1.1    2024-01-30 [2] CRAN (R 4.3.2)
##  IRanges            2.36.0     2023-10-24 [2] Bioconductor
##  iterators          1.0.14     2022-02-05 [2] CRAN (R 4.3.0)
##  jquerylib          0.1.4      2021-04-26 [2] CRAN (R 4.3.0)
##  jsonlite           1.8.8      2023-12-04 [2] CRAN (R 4.3.0)
##  knitr              1.45       2023-10-30 [2] CRAN (R 4.3.0)
##  labeling           0.4.3      2023-08-29 [2] CRAN (R 4.3.0)
##  later              1.3.2      2023-12-06 [2] CRAN (R 4.3.0)
##  lattice          * 0.22-5     2023-10-24 [2] CRAN (R 4.3.0)
##  lifecycle          1.0.4      2023-11-07 [2] CRAN (R 4.3.0)
##  lubridate        * 1.9.3      2023-09-27 [2] CRAN (R 4.3.0)
##  magrittr           2.0.3      2022-03-30 [2] CRAN (R 4.3.0)
##  MASS               7.3-60.0.1 2024-01-13 [2] CRAN (R 4.3.0)
##  Matrix             1.6-5      2024-01-11 [2] CRAN (R 4.3.0)
##  memoise            2.0.1      2021-11-26 [2] CRAN (R 4.3.0)
##  mgcv               1.9-1      2023-12-21 [2] CRAN (R 4.3.0)
##  mime               0.12       2021-09-28 [2] CRAN (R 4.3.0)
##  miniUI             0.1.1.1    2018-05-18 [2] CRAN (R 4.3.0)
##  multtest           2.58.0     2023-10-24 [2] Bioconductor
##  munsell            0.5.0      2018-06-12 [2] CRAN (R 4.3.0)
##  nlme               3.1-164    2023-11-27 [2] CRAN (R 4.3.0)
##  pacman             0.5.1      2019-03-11 [1] CRAN (R 4.3.0)
##  patchwork        * 1.2.0.9000 2024-05-07 [1] Github (thomasp85/patchwork@d943757)
##  permute          * 0.9-7      2022-01-27 [2] CRAN (R 4.3.0)
##  phyloseq         * 1.46.0     2023-10-24 [2] Bioconductor
##  pillar             1.9.0      2023-03-22 [2] CRAN (R 4.3.0)
##  pkgbuild           1.4.3      2023-12-10 [2] CRAN (R 4.3.0)
##  pkgconfig          2.0.3      2019-09-22 [2] CRAN (R 4.3.0)
##  pkgload            1.3.4      2024-01-16 [2] CRAN (R 4.3.0)
##  plyr               1.8.9      2023-10-02 [1] CRAN (R 4.3.0)
##  profvis            0.3.8      2023-05-02 [2] CRAN (R 4.3.0)
##  promises           1.2.1      2023-08-10 [2] CRAN (R 4.3.0)
##  purrr            * 1.0.2      2023-08-10 [2] CRAN (R 4.3.0)
##  R6                 2.5.1      2021-08-19 [2] CRAN (R 4.3.0)
##  Rcpp               1.0.12     2024-01-09 [2] CRAN (R 4.3.0)
##  RCurl              1.98-1.14  2024-01-09 [2] CRAN (R 4.3.0)
##  readr            * 2.1.5      2024-01-10 [1] CRAN (R 4.3.0)
##  remotes            2.4.2.1    2023-07-18 [2] CRAN (R 4.3.0)
##  reshape2           1.4.4      2020-04-09 [2] CRAN (R 4.3.0)
##  rhdf5              2.46.1     2023-11-29 [2] Bioconductor
##  rhdf5filters       1.14.1     2023-11-06 [2] Bioconductor
##  Rhdf5lib           1.24.2     2024-02-07 [2] Bioconductor 3.18 (R 4.3.2)
##  rlang              1.1.3      2024-01-10 [2] CRAN (R 4.3.0)
##  rmarkdown          2.25       2023-09-18 [2] CRAN (R 4.3.0)
##  rstatix          * 0.7.2      2023-02-01 [1] CRAN (R 4.3.0)
##  rstudioapi         0.15.0     2023-07-07 [2] CRAN (R 4.3.0)
##  S4Vectors          0.40.2     2023-11-23 [2] Bioconductor
##  sass               0.4.8      2023-12-06 [2] CRAN (R 4.3.0)
##  scales             1.3.0      2023-11-28 [2] CRAN (R 4.3.0)
##  sessioninfo        1.2.2      2021-12-06 [2] CRAN (R 4.3.0)
##  shiny              1.8.0      2023-11-17 [2] CRAN (R 4.3.0)
##  stringi            1.8.3      2023-12-11 [2] CRAN (R 4.3.0)
##  stringr          * 1.5.1      2023-11-14 [2] CRAN (R 4.3.0)
##  survival           3.5-7      2023-08-14 [2] CRAN (R 4.3.0)
##  tibble           * 3.2.1      2023-03-20 [2] CRAN (R 4.3.0)
##  tidyr            * 1.3.1      2024-01-24 [1] CRAN (R 4.3.2)
##  tidyselect         1.2.0      2022-10-10 [2] CRAN (R 4.3.0)
##  tidyverse        * 2.0.0      2023-02-22 [1] CRAN (R 4.3.0)
##  timechange         0.3.0      2024-01-18 [2] CRAN (R 4.3.0)
##  tzdb               0.4.0      2023-05-12 [2] CRAN (R 4.3.0)
##  urlchecker         1.0.1      2021-11-30 [2] CRAN (R 4.3.0)
##  usethis          * 2.2.2      2023-07-06 [2] CRAN (R 4.3.0)
##  utf8               1.2.4      2023-10-22 [2] CRAN (R 4.3.0)
##  vctrs              0.6.5      2023-12-01 [2] CRAN (R 4.3.0)
##  vegan            * 2.6-4      2022-10-11 [1] CRAN (R 4.3.0)
##  withr              3.0.0      2024-01-16 [2] CRAN (R 4.3.0)
##  xfun               0.42       2024-02-08 [2] CRAN (R 4.3.2)
##  xtable             1.8-4      2019-04-21 [2] CRAN (R 4.3.0)
##  XVector            0.42.0     2023-10-24 [2] Bioconductor
##  yaml               2.3.8      2023-12-11 [2] CRAN (R 4.3.0)
##  zlibbioc           1.48.0     2023-10-24 [2] Bioconductor
## 
##  [1] /Users/cab565/Library/R/x86_64/4.3/library
##  [2] /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library
## 
## ──────────────────────────────────────────────────────────────────────────────